#!/usr/bin/env ruby

# Copyright (c) 2023-2024 Huawei Device Co., Ltd.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# wrapper over the library function to avoid
# the prologue bloating in the caller
function(:MemmoveInterposer,
          params: {dst: 'ptr', src: 'ptr', size: 'u32'},
          mode: [:FastPath]) {
    Call(dst, src, size).Method("memmove").ptr
    ReturnVoid().void
}

#if Options.arch_64_bits?
#    $regmask = RegMask.new($full_regmap, :tmp0, :tmp1, :arg0, :arg1, :arg2, :arg3, :arg4)
#else
    $regmask = $panda_mask
#end
# inputs:
# src_start is len in bytes
function(:TryBigCopy,
          params: {dst_data: 'ptr', src_data: 'ptr', len: 'u32'},
          regalloc_set: $regmask,
          regmap: $full_regmap,
          mode: [:FastPath]) {

    # Arm32 is not supported
    if Options.arch == :arm32
        Intrinsic(:UNREACHABLE).void.Terminator
        next
    end

    len_bytes := len
    len_word := Cast(len_bytes).word

    # if bytes size > 4096 bytes, copy via system routine
    compare := Compare(len_bytes, 4096).GT.b
    IfImm(compare).Imm(0).NE.Unlikely.b {
        LiveOut(dst_data).DstReg(regmap[:arg0]).ptr
        LiveOut(src_data).DstReg(regmap[:arg1]).ptr
        LiveOut(len_bytes).DstReg(regmap[:arg2]).u32
        ep_offset = get_entrypoint_offset("MEMMOVE_INTERPOSER")
        Intrinsic(:TAIL_CALL).AddImm(ep_offset).MethodAsImm("MemmoveInterposer").Terminator.void
        Intrinsic(:UNREACHABLE).Terminator.void if defines.DEBUG
    }

    If(src_data, dst_data).CC(:CC_A).b {
        Goto(:ForwardCopy)
    }

    gap := Sub(dst_data, src_data).word
    If(gap, len_word).AE.b {
        Goto(:ForwardCopy)
    }

    # BackwardCopy
    compare := Compare(len_bytes, 64).GE.b
    IfImm(compare).Imm(0).NE.b {
        Goto(:BackwardCopy64)
    }

    idx := Sub(len_bytes, 8).i32
    buf := Load(src_data, idx).u64

Label(:BackwardCopyLoop)
    idx := Phi(idx, idx_prev).i32
    buf := Phi(buf, buf_prev).u64
    compare := Compare(idx, 8).LE.b
    IfImm(compare).Imm(0).NE.Unlikely.b {
        Goto(:BackwardCopyTail)
    }
    Store(dst_data, idx, buf).u64
    idx_prev := Sub(idx, 8).i32
    buf_prev := Load(src_data, idx_prev).u64
    Goto(:BackwardCopyLoop)

Label(:BackwardCopyTail)
    buf1 := LoadI(src_data).Imm(0).u64
    Store(dst_data, idx, buf).u64
    StoreI(dst_data, buf1).Imm(0).u64
    Goto(:End)

Label(:BackwardCopy64)
    # This is naive x6 loop unroll for 64+ bytes
    # TODO (asidorov): remove when LoopUnroll pass added in Irtoc
    first_buf0 := LoadI(src_data).Imm(0).u64
    first_buf1 := LoadI(src_data).Imm(8).u64
    first_buf2 := LoadI(src_data).Imm(16).u64
    first_buf3 := LoadI(src_data).Imm(24).u64
    first_buf4 := LoadI(src_data).Imm(32).u64
    first_buf5 := LoadI(src_data).Imm(40).u64
    len_tail := SubI(len_word).Imm(48).word
    src_ptr_init := Add(src_data, len_tail).ptr
    dst_ptr_init := Add(dst_data, len_tail).ptr

Label(:BackwardCopyLoop64)
    src_ptr := Phi(src_ptr_init, prev_src_ptr).ptr
    dst_ptr := Phi(dst_ptr_init, prev_dst_ptr).ptr

    buf5 := LoadI(src_ptr).Imm(40).u64
    buf4 := LoadI(src_ptr).Imm(32).u64
    buf3 := LoadI(src_ptr).Imm(24).u64
    buf2 := LoadI(src_ptr).Imm(16).u64
    buf1 := LoadI(src_ptr).Imm(8).u64
    buf0 := LoadI(src_ptr).Imm(0).u64

    StoreI(dst_ptr, buf5).Imm(40).u64
    StoreI(dst_ptr, buf4).Imm(32).u64
    StoreI(dst_ptr, buf3).Imm(24).u64
    StoreI(dst_ptr, buf2).Imm(16).u64
    StoreI(dst_ptr, buf1).Imm(8).u64
    StoreI(dst_ptr, buf0).Imm(0).u64

    prev_src_ptr := SubI(src_ptr).Imm(48).ptr
    prev_dst_ptr := SubI(dst_ptr).Imm(48).ptr
    If(prev_src_ptr, src_data).CC(:CC_B).Unlikely.b {
        Goto(:BackwardCopyTail64)
    } Else {
        Goto(:BackwardCopyLoop64)
    }

Label(:BackwardCopyTail64)
    StoreI(dst_data, first_buf5).Imm(40).u64
    StoreI(dst_data, first_buf4).Imm(32).u64
    StoreI(dst_data, first_buf3).Imm(24).u64
    StoreI(dst_data, first_buf2).Imm(16).u64
    StoreI(dst_data, first_buf1).Imm(8).u64
    StoreI(dst_data, first_buf0).Imm(0).u64
    Goto(:End)

Label(:ForwardCopy)
    compare := Compare(len_bytes, 64).GE.b
    IfImm(compare).Imm(0).NE.b {
        Goto(:ForwardCopy64)
    }

    # ForwardCopy
    len_bytes := Sub(len_bytes, 16).i32
    idx := 0
    buf := Load(src_data, idx).u64

Label(:ForwardCopyLoop)
    idx := Phi(idx, idx_next).i32
    buf := Phi(buf, buf_next).u64
    If(idx, len_bytes).GE.Unlikely.b {
        Goto(:ForwardCopyTail)
    }
    Store(dst_data, idx, buf).u64
    idx_next := Add(idx, 8).i32
    buf_next := Load(src_data, idx_next).u64
    Goto(:ForwardCopyLoop)

Label(:ForwardCopyTail)
    len_bytes := Add(len_bytes, 8).i32
    buf1 := Load(src_data, len_bytes).u64
    Store(dst_data, idx, buf).u64
    Store(dst_data, len_bytes, buf1).u64

    Goto(:End)

Label(:ForwardCopy64)
    # This is naive x6 loop unroll for 64+ bytes
    # TODO (asidorov): remove when LoopUnroll pass added in Irtoc
    len_tail := SubI(len_word).Imm(48).word
    src_end_data := Add(src_data, len_tail).ptr
    last_buf0 := LoadI(src_end_data).Imm(0).u64
    last_buf1 := LoadI(src_end_data).Imm(8).u64
    last_buf2 := LoadI(src_end_data).Imm(16).u64
    last_buf3 := LoadI(src_end_data).Imm(24).u64
    last_buf4 := LoadI(src_end_data).Imm(32).u64
    last_buf5 := LoadI(src_end_data).Imm(40).u64

Label(:ForwardCopyLoop64)
    src_ptr := Phi(src_data, next_src_ptr).ptr
    dst_ptr := Phi(dst_data, next_dst_ptr).ptr

    buf0 := LoadI(src_ptr).Imm(0).u64
    buf1 := LoadI(src_ptr).Imm(8).u64
    buf2 := LoadI(src_ptr).Imm(16).u64
    buf3 := LoadI(src_ptr).Imm(24).u64
    buf4 := LoadI(src_ptr).Imm(32).u64
    buf5 := LoadI(src_ptr).Imm(40).u64

    StoreI(dst_ptr, buf0).Imm(0).u64
    StoreI(dst_ptr, buf1).Imm(8).u64
    StoreI(dst_ptr, buf2).Imm(16).u64
    StoreI(dst_ptr, buf3).Imm(24).u64
    StoreI(dst_ptr, buf4).Imm(32).u64
    StoreI(dst_ptr, buf5).Imm(40).u64

    next_src_ptr := AddI(src_ptr).Imm(48).ptr
    next_dst_ptr := AddI(dst_ptr).Imm(48).ptr
    If(next_src_ptr, src_end_data).AE.Unlikely.b {
        Goto(:ForwardCopyTail64)
    } Else {
        Goto(:ForwardCopyLoop64)
    }

Label(:ForwardCopyTail64)
    dst_end_data := Add(dst_data, len_tail).ptr

    StoreI(dst_end_data, last_buf0).Imm(0).u64
    StoreI(dst_end_data, last_buf1).Imm(8).u64
    StoreI(dst_end_data, last_buf2).Imm(16).u64
    StoreI(dst_end_data, last_buf3).Imm(24).u64
    StoreI(dst_end_data, last_buf4).Imm(32).u64
    StoreI(dst_end_data, last_buf5).Imm(40).u64
    ### End copy

Label(:End)
    ReturnVoid().void
}

